package com.adu.music.parser;

import com.adu.music.bean.PlaylistCategory;
import com.adu.music.util.CommonUtils;
import com.adu.music.util.JsoupUtils;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;

/**
 * @author duchuanchuan
 * @date 2016/12/12
 */
public class PlaylistParser {
    private static final Logger logger = LoggerFactory.getLogger(PlaylistParser.class);

    /**
     * 解析歌单页面
     * @param category http://music.163.com/discover/playlist/?order=hot&cat=华语&limit=35&offset=35
     * @return 歌单列表
     * @throws IOException 似乎还有别的异常，建议用Exception捕获
     */
    public static List<Map<String, Object>> parsePlaylists(PlaylistCategory category) throws IOException {
        String url = "http://music.163.com/discover/playlist/?order=hot&cat=" + category.getName() + "&limit=35&offset=" + category.getOffset();
        List<Map<String, Object>> list = new ArrayList<>();
        Optional<Document> docOptional = JsoupUtils.get(url);
        docOptional.ifPresent(doc -> {
            Elements elements = doc.select("#m-pl-container li");
            for (Element element : elements) {
                Element e = element.getElementsByTag("a").get(0);
                String playListUrl = "http://music.163.com" + e.attr("href");
                // if (isPlayListCrawled(id)) continue;
                Map<String, Object> playList = parsePlayList(playListUrl);

                if (playList.isEmpty()) continue;
                int comment = (int) playList.get("comment");
                int share = (int) playList.get("share");
                int favorite = (int) playList.get("favorite");
                if (comment < 10 || share < 10 || favorite < 10) continue;
                list.add(playList);
            }
        });
        return list;
    }

    /*private static boolean isPlayListCrawled(String id) {
        List<Map<String, Object>> list = DbUtils.getJdbcTemplate().queryForList("select id from playlist where id=" + id);
        return list.size() == 1;
    }*/

    /**
     * 抓取单个歌单
     * @param url 歌单url
     * @return 歌单信息
     */
    private static Map<String, Object> parsePlayList(String url) {
        Map<String, Object> pl = new HashMap<>();
        Optional<Document> docOptional = JsoupUtils.get(url);
        docOptional.ifPresent((Document doc) -> {
            if(StringUtils.contains(doc.html(), "网页找不到")){
                logger.info("你要查找的网页找不到,url:{}", url);
                return;
            }
            // 歌单ID
            String id = StringUtils.substringAfter(url, "id=");
            Elements minfos = doc.getElementsByClass("m-info");
            if (minfos.size() == 0) {
                logger.info("URL:{}没有m-info", url);
                return;
            }
            Element minfo = minfos.get(0);
            Elements h2s = minfo.select(".tit h2");
            if (h2s.size() == 0) {
                logger.info("抓取歌单名称一场:{}", url);
                return;
            }
            // 歌单名称
            String name = h2s.get(0).text();
            Elements userElements = minfo.select(".user .name");
            if (userElements.size() == 0) {
                logger.info("抓取歌单信息用户异常，url:{}", url);
                return;
            }
            Element user = userElements.get(0).getElementsByTag("a").get(0);
            // 用户名
            String userName = user.text();
            // 用户ID
            String userId = StringUtils.substringAfter(user.attr("href"), "id=");
            Element oprElement = minfo.getElementById("content-operation");
            Elements oprAnchors = oprElement.getElementsByTag("a");
            if (oprAnchors.size() < 6) {
                logger.info("抓取歌单信息操作区一场,url:{}", url);
                return;
            }
            Elements tagElements = minfo.select(".tags .u-tag");
            List<String> tags = new ArrayList<>();
            for (Element te : tagElements) {
                tags.add(te.getElementsByTag("i").get(0).text());
            }
            // 歌单标签
            String category = tags.size() == 0 ? "" : tags.stream().collect(Collectors.joining(","));
            // 收藏数量
            String favorite = oprAnchors.get(2).attr("data-count");
            // 分享数量
            String share = oprAnchors.get(3).attr("data-count");
            // 评论数量
            String comment = minfo.getElementById("cnt_comment_count").text();
            if (StringUtils.equals(comment, "评论")) {
                return;
            }
            // 描述
            String description = "";
            if (minfo.select(".intr").size() >= 1) {
                description = minfo.select(".intr").get(0).text();
            }
            // 播放次数
            String play = doc.getElementById("play-count").text();

            pl.put("id", CommonUtils.parseInt(id));
            pl.put("name", name);
            pl.put("user_id", CommonUtils.parseInt(userId));
            pl.put("user_name", userName);
            pl.put("category", category);
            pl.put("favorite", CommonUtils.parseInt(favorite));
            pl.put("share", CommonUtils.parseInt(share));
            pl.put("comment", CommonUtils.parseInt(comment));
            pl.put("play", CommonUtils.parseInt(play));
            pl.put("description", description);
            logger.info("抓取歌单,id:{},name:{},favorite:{},share:{},coment:{}", id, name, favorite, share, comment);
        });
        return pl;
    }

    public static Map<String, Object> parsePlaylistById(String playlistId) {
        return parsePlayList("http://music.163.com/playlist?id=" + playlistId);
    }

    /**
     * 根据歌单抓取其包含的歌曲的ID
     * @param playlistId 歌单ID
     * @return 歌曲id集合
     */
    public static List<String> parseSongIds(String playlistId) {
        String url = "http://music.163.com/playlist?id=" + playlistId;
        Optional<Document> docOptional = JsoupUtils.get(url);
        List<String> ids = new ArrayList<>();
        docOptional.ifPresent((Document doc) -> {
            Elements elements = doc.getElementById("song-list-pre-cache").getElementsByTag("li");
            for (Element li : elements) {
                Elements anchors = li.getElementsByTag("a");
                if (anchors.isEmpty()) continue;
                String songUrl = anchors.get(0).attr("href");
                String id = StringUtils.substringAfter(songUrl, "id=");
                if (StringUtils.isNotBlank(id)) {
                    ids.add(id);
                }
            }
        });
        return ids;
    }
}
